import json
import os.path
from datetime import timedelta
from urllib.parse import urljoin

import scrapy
from scrapy import Request
from scrapy.utils.project import get_project_settings

from doc.ouyeel_cps import Querys
from ouyeelMall.api.dicts.mall_dict import iron_catid_dict
from ouyeelMall.items import MallProductItem


class OuyeelSpider(scrapy.Spider):
    name = 'ouyeel'
    allowed_domains = ['ouyeel.com']
    start_urls = ['https://www.ouyeel.com/']

    settings = get_project_settings()

    CRAWL_CYCLE = settings.get("CRAWL_CYCLE", default=1) + 10000
    limit_day = settings.get("TODAY") - timedelta(days=CRAWL_CYCLE - 1)  # str(datetime.now())[:10]

    # 已经事先处理好了的channal，pruductType, shopSign
    querys = Querys

    # dup_file = os.path.join(ProjectPath, "doc/duplicate_urls.txt")
    # with open(dup_file, 'r', encoding="U8") as f:
    #     duplicated = {l.strip() for l in f.readlines()}

    def start_requests(self):
        for query in self.querys:
            chan_code = query['code']
            chan_name = query['name']
            for pt in query['productType']:
                pt_value = pt['value']
                pt_name = pt['name']
                shop_signs = pt['shop_signs']
                if shop_signs:
                    for ss_value in shop_signs:
                        url = f"https://www.ouyeel.com/search-ng/queryResource/indexxhzy?" \
                              f"productType={pt_value}&shopSign={ss_value}&channel={chan_code}"
                        yield Request(url=url, callback=self.parse_list, dont_filter=True,
                                      meta={"channal_name": chan_name, "product_type": pt_name, "shop_sign": ss_value},
                                      )
                else:
                    url = f"https://www.ouyeel.com/search-ng/queryResource/indexxhzy?productType={pt_value}&channel={chan_code}"
                    yield Request(url=url, callback=self.parse_list, dont_filter=True,
                                  meta={"channal_name": chan_name, "product_type": pt_name, "shop_sign": ''},
                                  )

    def parse(self, response, **kwargs):
        pass

    def parse_list(self, response, **kwargs):
        """
        :param response:
        :param kwargs:
        :return:
        """
        meta = response.meta
        channal_name = meta['channal_name']
        product_type = meta['product_type']
        shop_sign = meta['shop_sign']

        value_items = parse_list(response, channal_name, product_type, shop_sign, limit_day=self.limit_day)
        for value in value_items:
            yield value

        # with open(self.dup_file, 'a+', encoding="U8") as f:
        #     f.write(my_hash(response.url))
        #     f.write("\n")


def parse_list(response, channal_name, product_type, shop_sign, limit_day):
    """
    :param response:
    :param channal_name:
    :param product_type:
    :param shop_sign:
    :param limit_day:
    :return:
    """
    product_divs = response.css('#search_result_content>div.searchcenter_result_content>div.tab_content>div.oy-list'
                                '>div[class^="oylistItem"]>div[class^="oylistItem"]>div[class^="oylistItem"]')
    for prod_div in product_divs:
        more_data = prod_div.css('div[class^="main_center"] a[class^="titleClickArea"]::attr(o-data)').get()
        try:
            more_data = json.loads(more_data)
        except Exception as e:
            continue

        create_date = more_data.get('createDate')
        if not create_date or create_date[:10] < str(limit_day):
            continue

        # 价格 没有价格是还没正式上线的，暂时跳过
        price = more_data.get("basicPrice")
        if not price:
            price = more_data.get("publishPrice")
            if not price:
                price = more_data.get("minPrice", 0)
        if not price:
            continue

        if not shop_sign:
            shop_sign = more_data.get("shopSignStandard", more_data.get("shopSign", ""))

        catid = iron_catid_dict.get(channal_name, iron_catid_dict.get(product_type, 46855))
        mallItem = MallProductItem(cat_id=catid)

        title_texts = prod_div.css(
            'div[class^="main_center_title"] span[class^="main_center_titleItem"]::text').getall()
        mallItem.title = ' '.join(t.strip() for t in title_texts) + more_data.get("shopSign", "")
        mallItem.amount = 1
        mallItem.unit = more_data.get("quantityUnit", "件")
        mallItem.brand = shop_sign

        href = prod_div.css('div[class^="main_center"] a[class^="titleClickArea"]::attr(href)').get()
        mallItem.sourceurl = urljoin("https://www.ouyeel.com", href)

        img_src = prod_div.css('div[class^="imageArea_imgWrapper"] img::attr(src)').get()
        if img_src:
            mallItem.thumb = urljoin("https://www.ouyeel.com", img_src)

        mallItem.addtime = create_date

        mallItem.pname = more_data.get("storeCityName", "")

        mallItem.step_p1 = float(price)

        content_detail = {
            "牌号": shop_sign,
            "重量": str(more_data.get("weight", more_data.get("balanceWeight", ""))) + " 吨/件",
            "规格详情": more_data.get("specComment", ""),
            "捆包号": more_data.get("packCode", ""),
            "钢厂资源号": more_data.get("factoryResCode", ""),
            "产地": more_data.get("manufactureName", ""),
            "存放地": more_data.get("storeCityName", ""),
            "仓库": more_data.get("warehouseName", ""),
            "库位": more_data.get("库位", ""),
            "生产日期": more_data.get("manufactureDate", ""),
            "入库日期": more_data.get("putinDate", ""),
            "镀层种类": more_data.get("coatingType", ""),
            "技术标准": more_data.get("techStandard", ""),
            "质量等级": more_data.get("qualityGradeName", ""),
            "质量缺陷": more_data.get("qualityDefects", ""),
            "镀层含量": more_data.get("镀层含量", ""),
            "颜色": more_data.get("color", ""),
            "表面结构": more_data.get("表面结构", ""),  # coatingStructure
            "包装方式": more_data.get("packingTypeName", ""),
            "边部状态": more_data.get("边部状态", ""),
            "面漆种类": more_data.get("paintType", ""),
            "表面质量": more_data.get("表面质量", ""),
            "合金状态": more_data.get("合金状态", ""),
            "质保书": more_data.get("warrantyPaid", ""),
            "原卷原标签": more_data.get("原卷原标签", ""),
            "表面处理": more_data.get("surfaceProcess", ""),
            "特殊说明": more_data.get("specialComments", ""),
        }
        # content_storage = {
        #     "仓储": more_data.get("warehouseName", ""),
        #     "地址": more_data.get("地址", ""),
        #     "联系人": more_data.get("contactName", ""),
        #     "联系电话": more_data.get("contactPhone", ""),
        #     "当前业务情况": more_data.get("deliveryCycle2", ""),
        #     "支持运输方式": more_data.get("transType", ""),
        #     # "友情提醒": more_data.get("specComment", ""),
        #     # "设备名称": more_data.get("specComment", ""),
        #     # "加工类型": more_data.get("specComment", ""),
        #     # "加工品种": more_data.get("specComment", ""),
        #     "备注信息": more_data.get("specialRemarks", ""),
        # }
        content = "<table>"
        for k, v in content_detail.items():
            content += f"<tr><td>{k}</td><td>{v}</td></tr>"
        content += "</table>"

        mallItem.content = content

        mallItem.set_defaults(skip_time=True)
        values = mallItem.values
        yield values
